{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import SparkSession\n",
    "from pyspark.sql import SparkSession\n",
    "spark=SparkSession.builder.appName('binary_class').getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#read the dataset\n",
    "df=spark.read.csv('classification_data.csv',inferSchema=True,header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql.functions import *\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(46751, 12)\n"
     ]
    }
   ],
   "source": [
    "#check the shape of the data \n",
    "print((df.count(),len(df.columns)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- loan_id: string (nullable = true)\n",
      " |-- loan_purpose: string (nullable = true)\n",
      " |-- is_first_loan: integer (nullable = true)\n",
      " |-- total_credit_card_limit: integer (nullable = true)\n",
      " |-- avg_percentage_credit_card_limit_used_last_year: double (nullable = true)\n",
      " |-- saving_amount: integer (nullable = true)\n",
      " |-- checking_amount: integer (nullable = true)\n",
      " |-- is_employed: integer (nullable = true)\n",
      " |-- yearly_salary: integer (nullable = true)\n",
      " |-- age: integer (nullable = true)\n",
      " |-- dependent_number: integer (nullable = true)\n",
      " |-- loan_defaulter: integer (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#printSchema\n",
    "df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['loan_id',\n",
       " 'loan_purpose',\n",
       " 'is_first_loan',\n",
       " 'total_credit_card_limit',\n",
       " 'avg_percentage_credit_card_limit_used_last_year',\n",
       " 'saving_amount',\n",
       " 'checking_amount',\n",
       " 'is_employed',\n",
       " 'yearly_salary',\n",
       " 'age',\n",
       " 'dependent_number',\n",
       " 'loan_defaulter']"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#number of columns in dataset\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------+------------+-------------+-----------------------+-----------------------------------------------+-------------+---------------+-----------+-------------+---+----------------+--------------+\n",
      "|loan_id|loan_purpose|is_first_loan|total_credit_card_limit|avg_percentage_credit_card_limit_used_last_year|saving_amount|checking_amount|is_employed|yearly_salary|age|dependent_number|loan_defaulter|\n",
      "+-------+------------+-------------+-----------------------+-----------------------------------------------+-------------+---------------+-----------+-------------+---+----------------+--------------+\n",
      "|    A_1|    personal|            1|                   7900|                                            0.8|         1103|           6393|          1|        16400| 42|               4|             0|\n",
      "|    A_2|    personal|            0|                   3300|                                           0.29|         2588|            832|          1|        75500| 56|               1|             0|\n",
      "|    A_3|    personal|            0|                   7600|                                            0.9|         1651|           8868|          1|        59000| 46|               1|             0|\n",
      "|    A_4|    personal|            1|                   3400|                                           0.38|         1269|           6863|          1|        26000| 55|               8|             0|\n",
      "|    A_5|   emergency|            0|                   2600|                                           0.89|         1310|           3423|          1|         9700| 41|               4|             1|\n",
      "+-------+------------+-------------+-----------------------+-----------------------------------------------+-------------+---------------+-----------+-------------+---+----------------+--------------+\n",
      "only showing top 5 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#view the dataset\n",
    "df.show(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------+-------+------------+------------------+-----------------------+-----------------------------------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------+\n",
      "|summary|loan_id|loan_purpose|     is_first_loan|total_credit_card_limit|avg_percentage_credit_card_limit_used_last_year|     saving_amount|   checking_amount|       is_employed|     yearly_salary|               age|  dependent_number|     loan_defaulter|\n",
      "+-------+-------+------------+------------------+-----------------------+-----------------------------------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------+\n",
      "|  count|  46751|       46751|             46751|                  46751|                                          46751|             46751|             46751|             46751|             46751|             46751|             46751|              46751|\n",
      "|   mean|   null|        null|0.5414429637868708|      4615.304485465552|                              0.700091121045545| 2037.636585313683|3520.6714294881394|0.9173279715941905| 29527.62079955509| 41.53979594019379|3.7448396825736348|0.34653804196701676|\n",
      "| stddev|   null|        null|0.4982848498677868|      1890.194453628314|                             0.1777288093267152|1498.6710906030362|2160.9332423713727|0.2753887911928983|16149.757703029438|12.817646350266434|2.6191527902107667|0.47587211651314887|\n",
      "|    min|    A_1|   emergency|                 0|                    500|                                            0.0|                 0|                 0|                 0|                 0|                18|                 0|                  0|\n",
      "|    max| A_9999|    property|                 1|                  13500|                                           1.09|             10641|             13165|                 1|             97200|                79|                 8|                  1|\n",
      "+-------+-------+------------+------------------+-----------------------+-----------------------------------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#Exploratory Data Analysis\n",
    "df.describe().show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+-----+\n",
      "|loan_defaulter|count|\n",
      "+--------------+-----+\n",
      "|             1|16201|\n",
      "|             0|30550|\n",
      "+--------------+-----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.groupBy('loan_defaulter').count().show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------+-----+\n",
      "|loan_purpose|count|\n",
      "+------------+-----+\n",
      "|      others| 6763|\n",
      "|   emergency| 7562|\n",
      "|    property|11388|\n",
      "|  operations|10580|\n",
      "|    personal|10458|\n",
      "+------------+-----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.groupBy('loan_purpose').count().show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "#converting categorical data to numerical form"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import required libraries\n",
    "from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "loan_purpose_indexer = StringIndexer(inputCol=\"loan_purpose\", outputCol=\"loan_purpose\").fit(df)\n",
    "df = loan_purpose_indexer.transform(df)\n",
    "loan_encoder = OneHotEncoder(inputCol=\"loan_index\", outputCol=\"loan_purpose_vec\")\n",
    "df = loan_encoder.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------+------------+----------------+\n",
      "|loan_purpose|loan_purpose|loan_purpose_vec|\n",
      "+------------+------------+----------------+\n",
      "|personal    |personal    |(4,[2],[1.0])   |\n",
      "|personal    |personal    |(4,[2],[1.0])   |\n",
      "|personal    |personal    |(4,[2],[1.0])   |\n",
      "+------------+------------+----------------+\n",
      "only showing top 3 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select(['loan_purpose','loan_purpose','loan_purpose_vec']).show(3,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.feature import VectorAssembler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['loan_id',\n",
       " 'loan_purpose',\n",
       " 'is_first_loan',\n",
       " 'total_credit_card_limit',\n",
       " 'avg_percentage_credit_card_limit_used_last_year',\n",
       " 'saving_amount',\n",
       " 'checking_amount',\n",
       " 'is_employed',\n",
       " 'yearly_salary',\n",
       " 'age',\n",
       " 'dependent_number',\n",
       " 'loan_defaulter',\n",
       " 'loan_index',\n",
       " 'loan_purpose_vec']"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_assembler = VectorAssembler(inputCols=['is_first_loan',\n",
    " 'total_credit_card_limit',\n",
    " 'avg_percentage_credit_card_limit_used_last_year',\n",
    " 'saving_amount',\n",
    " 'checking_amount',\n",
    " 'is_employed',\n",
    " 'yearly_salary',\n",
    " 'age',\n",
    " 'dependent_number',\n",
    " 'loan_purpose_vec'], outputCol=\"features\")\n",
    "df = df_assembler.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- loan_id: string (nullable = true)\n",
      " |-- loan_purpose: string (nullable = true)\n",
      " |-- is_first_loan: integer (nullable = true)\n",
      " |-- total_credit_card_limit: integer (nullable = true)\n",
      " |-- avg_percentage_credit_card_limit_used_last_year: double (nullable = true)\n",
      " |-- saving_amount: integer (nullable = true)\n",
      " |-- checking_amount: integer (nullable = true)\n",
      " |-- is_employed: integer (nullable = true)\n",
      " |-- yearly_salary: integer (nullable = true)\n",
      " |-- age: integer (nullable = true)\n",
      " |-- dependent_number: integer (nullable = true)\n",
      " |-- loan_defaulter: integer (nullable = true)\n",
      " |-- loan_index: double (nullable = false)\n",
      " |-- loan_purpose_vec: vector (nullable = true)\n",
      " |-- features: vector (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------------------------------------------------------+--------------+\n",
      "|features                                                            |loan_defaulter|\n",
      "+--------------------------------------------------------------------+--------------+\n",
      "|[1.0,7900.0,0.8,1103.0,6393.0,1.0,16400.0,42.0,4.0,0.0,0.0,1.0,0.0] |0             |\n",
      "|[0.0,3300.0,0.29,2588.0,832.0,1.0,75500.0,56.0,1.0,0.0,0.0,1.0,0.0] |0             |\n",
      "|[0.0,7600.0,0.9,1651.0,8868.0,1.0,59000.0,46.0,1.0,0.0,0.0,1.0,0.0] |0             |\n",
      "|[1.0,3400.0,0.38,1269.0,6863.0,1.0,26000.0,55.0,8.0,0.0,0.0,1.0,0.0]|0             |\n",
      "|[0.0,2600.0,0.89,1310.0,3423.0,1.0,9700.0,41.0,4.0,0.0,0.0,0.0,1.0] |1             |\n",
      "|[0.0,7600.0,0.51,1040.0,2406.0,1.0,22900.0,52.0,0.0,0.0,1.0,0.0,0.0]|0             |\n",
      "|[1.0,6900.0,0.82,2408.0,5556.0,1.0,34800.0,48.0,4.0,0.0,1.0,0.0,0.0]|0             |\n",
      "|[0.0,5700.0,0.56,1933.0,4139.0,1.0,32500.0,64.0,2.0,0.0,0.0,1.0,0.0]|0             |\n",
      "|[1.0,3400.0,0.95,3866.0,4131.0,1.0,13300.0,23.0,3.0,0.0,0.0,1.0,0.0]|0             |\n",
      "|[0.0,2900.0,0.91,88.0,2725.0,1.0,21100.0,52.0,1.0,0.0,0.0,1.0,0.0]  |1             |\n",
      "+--------------------------------------------------------------------+--------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select(['features','loan_defaulter']).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "#select data for building model\n",
    "model_df=df.select(['features','loan_defaulter'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.classification import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "#split the data \n",
    "training_df,test_df=model_df.randomSplit([0.75,0.25])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "34958"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+-----+\n",
      "|loan_defaulter|count|\n",
      "+--------------+-----+\n",
      "|             1|12048|\n",
      "|             0|22910|\n",
      "+--------------+-----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "training_df.groupBy('loan_defaulter').count().show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11793"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+-----+\n",
      "|loan_defaulter|count|\n",
      "+--------------+-----+\n",
      "|             1| 4153|\n",
      "|             0| 7640|\n",
      "+--------------+-----+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_df.groupBy('loan_defaulter').count().show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "log_reg=LogisticRegression(labelCol='loan_defaulter').fit(training_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Training Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_summary=log_reg.summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8939298586875679"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr_summary.accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9587456481363935"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr_summary.areaUnderROC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.9233245149911816, 0.8396318618667535]\n"
     ]
    }
   ],
   "source": [
    "print(lr_summary.precisionByLabel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.914054997817547, 0.8556606905710491]\n"
     ]
    }
   ],
   "source": [
    "print(lr_summary.recallByLabel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------+--------------------+--------------------+----------+\n",
      "|            features|loan_defaulter|       rawPrediction|         probability|prediction|\n",
      "+--------------------+--------------+--------------------+--------------------+----------+\n",
      "|(13,[0,1,2,3,4,7]...|             1|[-3.4630360774167...|[0.03038246469741...|       1.0|\n",
      "|(13,[0,1,2,3,4,7]...|             1|[-5.5391195110590...|[0.00391460129742...|       1.0|\n",
      "|(13,[0,1,2,3,4,7]...|             0|[1.00238593296486...|[0.73152742283114...|       0.0|\n",
      "|(13,[0,1,2,3,4,7]...|             1|[-1.8290704519648...|[0.13834904603406...|       1.0|\n",
      "|(13,[0,1,2,3,4,7]...|             1|[-1.5501728962289...|[0.17506129798003...|       1.0|\n",
      "|(13,[0,1,2,3,4,7]...|             0|[6.60737916543425...|[0.99865145442765...|       0.0|\n",
      "|(13,[0,1,2,3,4,7]...|             0|[7.50587822302399...|[0.99945045940723...|       0.0|\n",
      "|(13,[0,1,2,3,4,7,...|             1|[-4.4555325192703...|[0.01148079400371...|       1.0|\n",
      "|(13,[0,1,2,3,4,7,...|             1|[-4.5326784954285...|[0.01063746639570...|       1.0|\n",
      "|(13,[0,1,2,3,4,7,...|             1|[-4.9717163244463...|[0.00688353015038...|       1.0|\n",
      "+--------------------+--------------+--------------------+--------------------+----------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "predictions = log_reg.transform(test_df)\n",
    "predictions.show(10)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['features', 'loan_defaulter', 'rawPrediction', 'probability', 'prediction']"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_predictions = log_reg.transform(test_df)\n",
    "model_predictions.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_predictions = log_reg.evaluate(test_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8945984906300347"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_predictions.accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8951909857782705"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_predictions.weightedPrecision"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.9129581151832461, 0.8608235010835541]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_predictions.recallByLabel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.9234741162452006, 0.8431603773584906]\n"
     ]
    }
   ],
   "source": [
    "print(model_predictions.precisionByLabel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9594316478468224"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_predictions.areaUnderROC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.classification import RandomForestClassifier\n",
    "rf = RandomForestClassifier(numTrees=50,maxDepth=30,labelCol='loan_defaulter')\n",
    "rf_model = rf.fit(training_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_predictions = rf_model.transform(test_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "true_pos=model_predictions.filter(model_predictions['loan_defaulter']==1).filter(model_predictions['prediction']==1).count()\n",
    "actual_pos=model_predictions.filter(model_predictions['loan_defaulter']==1).count()\n",
    "pred_pos=model_predictions.filter(model_predictions['prediction']==1).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8979051288225379"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Recall \n",
    "float(true_pos)/(actual_pos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8660009289363678"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Precision on test Data \n",
    "float(true_pos)/(pred_pos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
